# Jordan Tam, PhD Thesis research
# University of British Columbia, CA
# Prepared by Tim Waring
# for publication
# 2020.08


# Social Learning Game and survey analysis

library(tidyverse)
library(broom)
library(reshape2)
library(gridExtra)
library(ggridges)

# Sections
# - Game Structure
# - Deck Choices
# - Certainty & Correctness
# - Social Info Use
# - Social Info Use correlates



## GAME STRUCTURE

game_structure <- read_csv("/Users/twaring/Documents/Research/CSP/data/game choices.csv")
# select file: "game choices.csv"

game_structure <- game_structure[,1:4]
##
game_structure$A_cum <- cumsum(game_structure$A)
game_structure$B_cum <- cumsum(game_structure$B)
game_structure$diff <- game_structure$A - game_structure$B
game_structure$cum_diff <- game_structure$A_cum - game_structure$B_cum
game_structure$diff_frac <- game_structure$diff / game_structure$A
game_structure$cum_diff_frac <- game_structure$cum_diff / game_structure$A_cum
head(game_structure)

mean(game_structure$diff_frac, na.rm = T) # A is on average 21% greater than B
t.test(x=game_structure$A, y=game_structure$B)


# Payoffs over time
ggplot(game_structure, aes(x=round)) + geom_line(aes(y=A), linetype=2) + geom_line(aes(y=B)) + geom_point(aes(y=A)) + geom_point(aes(y=B)) +	ylab("payoff") + xlab("round") + ylim(c(0,225))

# cumulative payoffs
ggplot(game_structure, aes(x=round)) + geom_line(aes(y=A_cum), linetype=2) + geom_line(aes(y=B_cum)) + geom_point(aes(y=A_cum), shape=21) + geom_point(aes(y=B_cum)) +	ylab("sumulative payoff") + xlab("round")

# difference in payoff
ggplot(game_structure, aes(x=round)) + geom_line(aes(y=diff))  + geom_point(aes(y=diff)) +	ylab("difference in payoff") + xlab("round")

# cumulative difference in payoff
cd_pay <- ggplot(game_structure, aes(x=round)) + geom_line(aes(y=cum_diff))  + geom_point(aes(y=cum_diff), shape=21) +	ylab("cumulative payoff difference\n ( HD - LD )") + xlab("round") ; cd_pay

# cumulative fractional difference in payoff
cfd_pay <- ggplot(game_structure, aes(x=round)) + geom_line(aes(y=cum_diff_frac)) + geom_point(aes(y=cum_diff_frac), shape=21) +	ylab("cumultative fractional difference \n in payoff: ( HD - LD ) / HD") + xlab("round"); cfd_pay

# double plot
struct_plot <- grid.arrange(cd_pay, cfd_pay, nrow=1)
ggsave("f2_pay_struct.pdf", struct_plot, width = 6, height = 2.5)







## DECK CHOICES

SLG <- read_csv("/Users/twaring/Documents/Research/CSP/data/SLG_Basic.csv")
# select file: "Survey 2 2020.06.22.csv"

head(SLG)
str(SLG)

itrajs <- SLG %>%
	select(-starts_with("RdBuy"), -starts_with("Rnd"), -starts_with("High"), -c(3:7)) %>%
	gather(starts_with("RdColo"), key="round",value="highdeck") %>%
	mutate(round = str_replace(round,"RdColo",""))  %>%
	mutate(round = as.numeric(round)) %>%
	mutate(highdeck = str_replace(highdeck,"9999","")) %>%
	mutate(highdeck = as.numeric(highdeck))

# n
itrajs %>%
	group_by(Synd_name) %>%
	filter(round == 1) %>%
	summarize(n())

# 1 Algarrobo     20
# 2 CoVentanas     3
# 3 El Manzano    14
# 4 El Quisco     15
# 5 Horcon        21
# 6 Los Molles    10
# 7 Papudo        19
# 8 Pichicuy       5
# 9 Quintay       10
# 10 Ventanas       5

# syndicate trajectories
strajs <- itrajs %>%
	group_by(round, Synd_name) %>%
	filter(Synd_name != "Pichicuy") %>%
	filter(Synd_name != "CoVentanas") %>%
	summarise(mean_high = mean(highdeck, na.rm=T))

# global tragjectory
alltrajs <- itrajs %>%
	group_by(round) %>%
	summarize(mean_high=mean(highdeck, na.rm=T)) %>%
	mutate(Synd_name="Global") %>%
	bind_rows(strajs)

syndicate_labels <- c("Algarrobo (20)", "El Manzano (14)", "El Quisco (15)", "Horcon (21)", "Los Molles (10)", "Papudo (19)", "Quintay (10)", "Ventanas (5)", "Global")


# Global trajectory
hd_pick_plot <- ggplot((filter(alltrajs,Synd_name == "Global")), aes(x=round,y=mean_high)) + geom_smooth(se = FALSE, method = lm, color="grey75", lwd=.5) + geom_line() + geom_point(shape=21) +  ylab("high deck pick rate") + geom_hline(yintercept = 0.5, linetype=2); hd_pick_plot

summary(lm(alltrajs$mean_high ~ alltrajs$round))

ggsave("f3a_glob_traj.pdf", width = 4, height = 4)


# Syndicate trajectories
alltrajs %>%
	transform(Synd_name=factor(Synd_name,levels=c("Algarrobo", "El Manzano", "El Quisco", "Horcon", "Los Molles", "Papudo", "Quintay", "Ventanas", "Global"))) %>%
	filter(Synd_name != "Ventanas") %>%
	ggplot(aes(x=round,y=mean_high)) + geom_smooth(method = "lm", se=F) +
	geom_line() + geom_point(shape=21) + facet_wrap(~Synd_name) +
	geom_hline (yintercept = 0.5, geom_line()) + ylab("high deck pick rate")

ggsave("fx7_Traj_synd.pdf", width = 4, height = 4)




## DOES LEARNING OCCUR?

## Certainty

cert <- read_csv("/Users/twaring/Documents/Research/CSP/data/certainty.csv")
# select file: "certainty.csv

cert <-	cert %>%
	gather(Rnd1_cert, Rnd5_cert, Rnd10_cert, Rnd15_cert, Rnd20_cert, Rnd25_cert, Rnd30_cert, key="testround",value="certainty") %>%
	mutate(testround = str_replace(testround,"Rnd","")) %>%
	mutate(testround = str_replace(testround,"_cert","")) %>%
	mutate(certainty = str_replace(certainty,"9999","")) %>%
	mutate(certainty = str_replace(certainty,"No se","")) %>%
	mutate(testround = as.numeric(testround))  %>%
	mutate(testround = as.factor(testround))  %>%
	mutate(certainty = as.numeric(certainty))

str(cert)
max(cert$certainty, na.rm = T)

# boxplot
ggplot(cert, aes(x = certainty, y = testround)) + geom_boxplot() + ylab("self reported certainty")


# ridge plot - 25% and 75% quartiles
library(ggridges)

# old
certainty_plot <- ggplot(cert, aes(x = certainty, y = testround)) + geom_density_ridges(scale = 1.5, rel_min_height = .03, quantile_lines = TRUE, quantiles = c(0.25, 0.5, 0.75), alpha = 0.8) + xlim(0, 100) + scale_fill_gradient(low = "white", high = "darkgray") + ylab("round") + xlab("self reported certainty") + geom_vline(xintercept = 50, linetype = 2) + coord_flip() ; certainty_plot


# new
certainty_plot <- ggplot(cert, aes(x = certainty, y = testround, fill = factor(stat(quantile)))) +
	stat_density_ridges(geom="density_ridges_gradient", calc_ecdf=TRUE, quantiles = c(0.25, 0.75), show.legend=FALSE, rel_min_height = .03) +
	scale_fill_manual( values = c("#A0A0A0A0", "#616161", "#A0A0A0A0")) +
	xlim(0, 100) +
	ylab("round") +
	xlab("self reported certainty") +
	geom_vline(xintercept = 50, linetype = 2) +
	coord_flip() ; certainty_plot




## Correctness

deckpicks <- SLG %>%
	select(-starts_with("RdColo"), -starts_with("RdBuy"),-c(3:7), -ends_with("_cert")) %>%
	gather(Rnd1_colo, Rnd5_colo, Rnd10_colo, Rnd15_colo, Rnd20_colo, Rnd25_colo, Rnd30_colo, key="testround",value="deckpick") %>%
	mutate(testround = str_replace(testround,"Rnd",""))  %>%
	mutate(testround = str_replace(testround,"_colo","")) %>%
	mutate(deckpick = str_replace(deckpick,"9999","")) %>%
	mutate(High_colour = str_replace(High_colour,"9999","")) %>%
	mutate(deckpick = str_replace(deckpick,"No se","")) %>%
	mutate(testround = as.numeric(testround))  %>%
	mutate(testround = as.factor(testround)) %>%
	mutate(High_colour = str_to_lower(High_colour)) %>%
	mutate(correct = if_else(deckpick == High_colour, 1, 0))

# Plot of correctness of guess
correctness_plot <- deckpicks %>%
	group_by(testround) %>%
	select (-High_colour, -deckpick) %>%
	summarize (mean_correct = mean(correct), sd_correct = sd(correct)) %>%
	ggplot(aes(x=testround, y=mean_correct)) + geom_point(shape=21, size=2, stroke=1) + xlab("round") + ylab("fraction correct") + ylim(c(0.45,.7)) + geom_hline(yintercept=0.5, linetype=2) ; correctness_plot



# TRIPLE plot of correctness, certainty, and pick rate
pickcorcert <- grid.arrange(hd_pick_plot, correctness_plot, certainty_plot , nrow=1)
ggsave("f3_pick_correct_cert.pdf", plot = pickcorcert, width = 7, height = 2.5)













## HOW IS SOCIAL INFO USED?

SLG <- read_csv("/Users/twaring/Documents/Research/CSP/data/SLG_Basic.csv")

sl <- SLG %>%
	select(-starts_with("RdColo"), -starts_with("Rnd"), -starts_with("High")) %>%
	gather(starts_with("RdBuy"), key="round",value="buy") %>%
	mutate(round = str_replace(round,"RdBuy","")) %>%
	mutate(buy = str_replace(buy,"9999","")) %>%
	mutate(buy = as.numeric(buy)) %>%
	mutate(round = as.numeric(round))

str(sl)


# global social learning over time
	gsl <- sl %>%
	group_by(round) %>%
	summarize(slrate=mean(buy, na.rm=T), slsd=sd(buy, na.rm=T)) %>%
		mutate(Synd_name = "Global")



# Proportion CIs
n = 122
p = mean(gsl$slrate,na.rm=TRUE)
sqrt(p)
CL = .99
a2 = (1 - CL)/2
z = qnorm(1-a2)
EBP = z * sqrt(p*(1-p) / n)
EBP


slrateplot <- ggplot(data=gsl, aes(x=round, y=slrate)) + geom_ribbon(aes(ymin=slrate-EBP, ymax=slrate+EBP),fill="grey70", alpha=0.7) + geom_line() + geom_point(shape=21) + ylab("social information access rate") + ylim(0,0.75) ;	slrateplot



	slrateplot <- ggplot(data=gsl, aes(x=round, y=slrate)) +
		geom_ribbon(aes(ymin=max(slrate-slsd,.3)-.3, ymax=slrate+slsd),fill="grey70", alpha=0.7) + geom_line() + geom_point(shape=21) + ylab("social information access rate");	slrateplot

	## social info use histogram

	sl <- SLG %>%
		select(-starts_with("RdColo"), -starts_with("Rnd"), -starts_with("High")) %>%
		gather(starts_with("RdBuy"), key="round",value="buy") %>%
		mutate(round = str_replace(round,"RdBuy","")) %>%
		mutate(buy = str_replace(buy,"9999","")) %>%
		mutate(Game_buycount = str_replace(Game_buycount,"9999","")) %>%
		mutate(Game_payoff = str_replace(Game_payoff,"9999","")) %>%
		mutate(Game_win = str_replace(Game_win,"9999","")) %>%
		mutate(Game_buycount = as.numeric(Game_buycount)) %>%
		mutate(Game_payoff = as.numeric(Game_payoff)) %>%
		mutate(Game_win = as.numeric(Game_win)) %>%
		mutate(Game_right = as.numeric(Game_right)) %>%
		mutate(buy = as.numeric(buy)) %>%
		mutate(round = as.numeric(round))

	socinfo_hist <- ggplot(data = 	filter(sl, round == 1), aes(Game_buycount)) + geom_histogram(color="black", fill="grey") + xlab("social information access") ; socinfo_hist

	grid.arrange(socinfo_hist, slrateplot, nrow=1)
	ggsave("f4_siuse_overview.pdf", plot = grid.arrange(socinfo_hist, slrateplot, nrow=1), width = 6, height = 2.5)




	# social info use by syndicate

	sl %>%
		group_by(Synd_name,Survey_num) %>%
		# filter(Synd_name != "Pichicuy") %>%
		# filter(Synd_name != "CoVentanas") %>%
		# filter(Synd_name != "Ventanas") %>%
		summarize(slrate=mean(buy, na.rm=T), slsd=sd(buy, na.rm=T)) %>%
		ggplot(aes(x=Synd_name, y=slrate)) + geom_boxplot()


	# social learning by syndicate over TIME
	sl %>%
		group_by(round,Synd_name) %>%
		filter(Synd_name != "Pichicuy") %>%
		filter(Synd_name != "CoVentanas") %>%
		filter(Synd_name != "Ventanas") %>%
		summarize(slrate=mean(buy, na.rm=T), slsd=sd(buy, na.rm=T)) %>%
		bind_rows(gsl) %>%
		transform(Synd_name=factor(Synd_name,levels=c("Algarrobo", "El Manzano", "El Quisco", "Horcon", "Los Molles", "Papudo", "Quintay", "Global"))) %>%
		ggplot(aes(x=round, y=slrate)) +
		geom_ribbon(aes(ymin=slrate-slsd, ymax=slrate+slsd),fill="grey70", alpha=0.7) +
		geom_line() + geom_point(shape=21) + ylab("social information access rate") +
		facet_wrap(~ Synd_name)



# Social info use by factor
siu <- SLG %>%
	select(-starts_with("RdColo"), -starts_with("Rnd"), -starts_with("High"), -starts_with("RdBuy"), -Game_cost) %>%
	mutate(Game_buycount = str_replace(Game_buycount,"9999","")) %>%
	mutate(Game_payoff = str_replace(Game_payoff,"9999","")) %>%
	mutate(Game_win = str_replace(Game_win,"9999","")) %>%
	mutate(Game_buycount = as.numeric(Game_buycount)) %>%
	mutate(Game_payoff = as.numeric(Game_payoff)) %>%
	mutate(Game_win = as.numeric(Game_win)) %>%
	mutate(Game_right = as.numeric(Game_right))


# NET Game winnings declines with buys
net <- ggplot(siu, aes(x = Game_buycount, y = Game_win)) + geom_point(shape=21) + geom_smooth(method="lm") + xlab("social information access") + ylab("net earnings") ; net

# GROSS Game payoff declines with buys
pay <- ggplot(siu, aes(x = Game_buycount, y = Game_payoff)) + geom_point(shape=21) + geom_smooth(method="lm") + xlab("social information access")  + ylab("payoffs"); pay

reg = lm(siu$Game_payoff ~ siu$Game_buycount)
summary(reg)


# Correct deck choices declines with buys
right <- ggplot(siu, aes(x = Game_buycount, y = Game_right)) + geom_point(shape=21) + geom_smooth(method="lm") + xlab("social information access")  + ylab("HP deck choices") ; right

reg = lm(siu$Game_right ~ siu$Game_buycount)
summary(reg)

# double plot of correctness and certainty
three_pane <- grid.arrange(net, pay, right , nrow=1); three_pane
ggsave("f5_si_impact.pdf", plot = three_pane, width = 7, height = 2.2)









 # DOES SOC LEARN INFLUENCE CHOICES (over time if not between people?)

SLG <- read_csv("/Users/twaring/Documents/Research/CSP/data/SLG_Basic.csv")

choices <- SLG %>%
	select(-c(2:7), -starts_with(c("Rnd","High", "RdBuy"))) %>%
	gather(starts_with("RdColo"), key="round",value="choice") %>%
	mutate(round = str_replace(round,"RdColo","")) %>%
	mutate(choice = str_replace(choice,"9999","")) %>%
	mutate(choice = as.numeric(choice)) %>%
	mutate(round = as.numeric(round)) ; choices

buys <- SLG %>%
	select(-c(2:7), -starts_with(c("Rnd","High", "RdColo"))) %>%
	gather(starts_with("RdBuy"), key="round",value="buy") %>%
	mutate(round = str_replace(round,"RdBuy","")) %>%
	mutate(buy = str_replace(buy,"9999","")) %>%
	mutate(buy = as.numeric(buy)) %>%
	mutate(round = as.numeric(round)) ; buys

unified <- full_join(buys,choices) %>%
	group_by(Survey_num) %>%
	arrange(Survey_num) %>%
	mutate(lag_buy = lag(buy, n=1, default = 0)) %>%
	mutate(lag_buy = as.factor(lag_buy)) %>%
	mutate(buy_cum = cumsum(buy)) %>%
	mutate(samples = buy_cum + round) %>%
	mutate(choice_cum = cumsum(choice)) %>%
	ungroup() %>%
	mutate(Survey_num = as.factor(Survey_num)); unified



# DOES USING SOCIAL INFO LAST ROUND IMPROVE CHOICES THIS ROUND?
# Nope.

# Calculate LAGGED social info use by round
# compare with correctness across players

unified %>%
	group_by(round, lag_buy) %>%
	summarize(freq_correct = mean(choice)) %>%
	ggplot(aes(x=round, y=freq_correct, color=lag_buy))  + geom_line() + geom_point()


bought <- unified %>%
	filter(lag_buy == 1) %>%
	group_by(round) %>%
	summarize(freq_correct_soc = mean(choice))

didnt <- unified %>%
	filter(lag_buy == 0) %>%
	group_by(round) %>%
	summarize(freq_correct_ind = mean(choice))

lagbuybump <- bought %>%
	right_join(didnt) %>%
	mutate(slben = freq_correct_soc - freq_correct_ind)

lag_buy_plot <- lagbuybump %>% ggplot(aes(x=round, y=slben))  + geom_bar(stat = "identity") + ylab("increase in correctness \nafter buying social information") ; lag_buy_plot

ggsave("lag_buy_bump.pdf", plot = lag_buy_plot, width = 5, height = 3)

# benefit of lagged social information use generally negative across rounds.





# Bring in the model data
models <- read_csv("/Users/twaring/Documents/Research/CSP/data/models.csv")

models <- models %>%
	gather(starts_with("player"), key="player",value="highdeck") %>%
	mutate(player = str_replace(player,"player",""))  %>%
	mutate(player = as.numeric(player))

#  create model choice stream, and lagged model choices
model <- models %>%
	filter(player == 7) %>%          # player 7 was selected as behavioral model
	select(Round, highdeck) %>%
	transmute(round = Round, model = highdeck) %>%
	mutate(lagmod = lag(model))

mean(model$model)
# 0.5
# model only chose correctly 50% of the time


# join model with choice data
lag_buy_same <- unified %>%
	left_join(model) %>%
	group_by(round, lag_buy) %>%
	mutate (same = 1 - abs(lagmod - choice)) ; lag_buy_same

didnt_buy <- lag_buy_same %>%
	filter(lag_buy == 0) %>%
	summarize(freq_same_didnt = mean(same)) %>%
	select (-lag_buy) ; didnt_buy

bought <- lag_buy_same %>%
	filter(lag_buy == 1) %>%
	summarize(freq_same_bought = mean(same)) %>%
	select (-lag_buy) ; bought

lag_buy_sim <- didnt_buy %>% left_join(bought) %>%
	mutate(similarity_bump = freq_same_bought - freq_same_didnt) ; lag_buy_sim


lag_buy_sim %>% ggplot(aes(x=round, y=similarity_bump))  + geom_bar(stat = "identity") + ylab("increase in similarity \nafter buying social information")

# who knows. Doesn't look likely.








# DOES RELATIONSHIP BETWEEN SOC INFO ACCESS AND CORRECTNESS VARY BETWEEN PEOPLE?
# yes.

d <- unified %>%
	group_by(Survey_num, buy_cum) %>%
	summarize(freq_correct = mean(choice_cum/round, na.rm = T))

ind_slopes <- d %>%
	group_by(Survey_num) %>%
	filter(n() >= 2) %>%
	do(model = lm(freq_correct ~ buy_cum, data=.)) %>%
	mutate(intercept = summary(model)$coeff[1], slope = summary(model)$coeff[2]) %>%
	select(-model)

indiv_slopes_plot <- ggplot(ind_slopes, aes(x=slope)) + geom_histogram(color="black", fill="grey") + xlab("slope of cum info use on correctness") + geom_vline(xintercept = 0, linetype=2) ; indiv_slopes_plot


two_pane <- grid.arrange(lagbuybump, indiv_slopes_plot, nrow=1); two_pane
ggsave("f6_soc_learn_vary.pdf", plot = two_pane, width = 6, height = 2.5)


# random effects interaction model
# - regress freq_correct_choice on buy_cum with random effects for player
library(lme4)
summary(lmer(data=d, freq_correct ~  (buy_cum|Survey_num)))





